from requests_html import HTMLSession
import pyperclip
import os
import time
'''
http://www.shulinw.com书林网
'''

# 获取请求对象
session = HTMLSession()
url = pyperclip.paste()
# 往网站发送get请求
page = session.get(url)
page.encoding = 'utf-8'
list = page.html.find('div#list-chapterAll', first=True)
content = list.find('dl.panel-chapterlist', first=True)
name = page.html.find('h1.bookTitle', first=True).text
print(name)
# 下载地址
path = r'D:\.爬取为txt'
# if(os.path.exists(path) == False):
#     os.mkdir(path)

a_s = content.find('a')
for i,a in enumerate(a_s):
    href = a.attrs['href']
    chap = a.text
    # if i > 124:
    print('href ', i, href, a.text)
    _page = session.get('http://www.shulinw.com' + href)
    _page.encoding = 'utf-8'
    text = _page.html.find('div#htmlContent', first=True).text
    with open(f'{path}\ ' + f'{name}.txt', 'a', encoding='utf-8') as f:
        f.write('\n')
        f.write(chap)
        f.write('\n\n')
        f.write(text)
    time.sleep(0.5)